**************************
***	RUN USING STATA 16 ***
**************************

* Purpose: calculate average Maths exam difficulty by year using classical test theory, output table.
* Last Updated: 06 May 2021

*********************************
*** MATHS (regression setup)  ***
*********************************

	use "$output\mat_unique.dta", clear	
	
	*rename items	
	ren o_* item_o_*
	ren s_* item_s_*
	
	reshape long item, i(pid booklet) j(j) string
	ren item response
	ren j it
	
	*remove TIMSS items
	gen length=length(it)
	drop if length==7
	drop length
	
	*adjust item names to force order
	gen yr = substr(it, 4, 1)
	egen order = concat(yr it)
	
	*gen type
	gen type = substr(it, 2, 1)
	replace type = "1" if t =="o"
	replace type = "2" if t =="s"
	destring type, replace
	lab def type 1 "objective" 2 "subjective"
	lab values type type
	
	*encode items
	encode order, gen(item)
	encode yr, gen(year)
	drop it yr order
	
	lab define year 1 "2011" 2 "2012" 3 "2013" 4 "2014" 5 "2015" 6 "2016" 7 "2017" 8 "2018" 9 "2019", modify 
	
	*four students have sub_only data but no pid
	drop if pid==.
	
	*regress items on year dummies (omitting 2015 as reference as lowest pass rate in WAEC data)
	*output tex table
	eststo clear
	eststo: reg response ib5.year
	estadd local hasbook "No"
	estadd local haspupil "No"
	eststo: reghdfe response ib5.year, absorb(booklet)
	estadd local hasbook "Yes"
	estadd local haspupil "No"
	eststo: reghdfe response ib5.year, absorb(booklet pid)
	estadd local hasbook "Yes"
	estadd local haspupil "Yes"
	esttab using "$table\mat_tab1.tex", b(3) se(3) label title("Item-level responses on year dummies") replace /*
	*/nogaps nomtitles booktabs star(* 0.10 ** 0.05 *** 0.01) scalars("hasbook Booklet FE" "haspupil Pupil FE")
	